kbailey0133@floridapoly.edulibrary(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(leaflet)
library(broom)
library(here)
## here() starts at /Users/katelynbailey/Documents/GitHub/Bailey_DataViz_Final_Project
fifa <- read_csv(here("data", "fifa18.csv"))
## Rows: 17076 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): name, nationality, club
## dbl (37): age, overall, potential, acceleration, aggression, agility, balanc...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(fifa)
sum(is.na(fifa))
## [1] 0
str(fifa)
## spc_tbl_ [17,076 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ name : chr [1:17076] "Cristiano Ronaldo" "L. Messi" "Neymar" "L. Suárez" ...
## $ nationality : chr [1:17076] "Portugal" "Argentina" "Brazil" "Uruguay" ...
## $ club : chr [1:17076] "Real Madrid CF" "FC Barcelona" "Paris Saint-Germain" "FC Barcelona" ...
## $ age : num [1:17076] 32 30 25 30 31 28 26 26 27 29 ...
## $ overall : num [1:17076] 94 93 92 92 92 91 90 90 90 90 ...
## $ potential : num [1:17076] 94 93 94 92 92 91 92 91 90 90 ...
## $ acceleration : num [1:17076] 89 92 94 88 58 79 57 93 60 78 ...
## $ aggression : num [1:17076] 63 48 56 78 29 80 38 54 60 50 ...
## $ agility : num [1:17076] 89 90 96 86 52 78 60 93 71 75 ...
## $ balance : num [1:17076] 63 95 82 60 35 80 43 91 69 69 ...
## $ ball_control : num [1:17076] 93 95 95 91 48 89 42 92 89 85 ...
## $ composure : num [1:17076] 95 96 92 83 70 87 64 87 85 86 ...
## $ crossing : num [1:17076] 85 77 75 77 15 62 17 80 85 68 ...
## $ curve : num [1:17076] 81 89 81 86 14 77 21 82 85 74 ...
## $ dribbling : num [1:17076] 91 97 96 86 30 85 18 93 79 84 ...
## $ finishing : num [1:17076] 94 95 89 94 13 91 13 83 76 91 ...
## $ free_kick_accuracy: num [1:17076] 76 90 84 84 11 84 19 79 84 62 ...
## $ gk_diving : num [1:17076] 7 6 9 27 91 15 90 11 10 5 ...
## $ gk_handling : num [1:17076] 11 11 9 25 90 6 85 12 11 12 ...
## $ gk_kicking : num [1:17076] 15 15 15 31 95 12 87 6 13 7 ...
## $ gk_positioning : num [1:17076] 14 14 15 33 91 8 86 8 7 5 ...
## $ gk_reflexes : num [1:17076] 11 8 11 37 89 10 90 8 10 10 ...
## $ heading_accuracy : num [1:17076] 88 71 62 77 25 85 21 57 54 86 ...
## $ interceptions : num [1:17076] 29 22 36 41 30 39 30 41 85 20 ...
## $ jumping : num [1:17076] 95 68 61 69 78 84 67 59 32 79 ...
## $ long_passing : num [1:17076] 77 87 75 64 59 65 51 81 93 59 ...
## $ long_shots : num [1:17076] 92 88 77 86 16 83 12 82 90 82 ...
## $ marking : num [1:17076] 22 13 21 30 10 25 13 25 63 12 ...
## $ penalties : num [1:17076] 85 74 81 85 47 81 40 86 73 70 ...
## $ positioning : num [1:17076] 95 93 90 92 12 91 12 85 79 92 ...
## $ reactions : num [1:17076] 96 95 88 93 85 91 88 85 86 88 ...
## $ short_passing : num [1:17076] 83 88 81 83 55 83 50 86 90 75 ...
## $ shot_power : num [1:17076] 94 85 80 87 25 88 31 79 87 88 ...
## $ sliding_tackle : num [1:17076] 23 26 33 38 11 19 13 22 69 18 ...
## $ sprint_speed : num [1:17076] 91 87 90 77 61 83 58 87 52 80 ...
## $ stamina : num [1:17076] 92 73 78 89 44 79 40 79 77 72 ...
## $ standing_tackle : num [1:17076] 31 28 24 45 10 42 21 27 82 22 ...
## $ strength : num [1:17076] 80 59 53 80 83 84 64 65 74 85 ...
## $ vision : num [1:17076] 85 90 80 84 70 78 68 86 88 70 ...
## $ volleys : num [1:17076] 88 85 83 88 11 87 13 79 82 88 ...
## - attr(*, "spec")=
## .. cols(
## .. name = col_character(),
## .. nationality = col_character(),
## .. club = col_character(),
## .. age = col_double(),
## .. overall = col_double(),
## .. potential = col_double(),
## .. acceleration = col_double(),
## .. aggression = col_double(),
## .. agility = col_double(),
## .. balance = col_double(),
## .. ball_control = col_double(),
## .. composure = col_double(),
## .. crossing = col_double(),
## .. curve = col_double(),
## .. dribbling = col_double(),
## .. finishing = col_double(),
## .. free_kick_accuracy = col_double(),
## .. gk_diving = col_double(),
## .. gk_handling = col_double(),
## .. gk_kicking = col_double(),
## .. gk_positioning = col_double(),
## .. gk_reflexes = col_double(),
## .. heading_accuracy = col_double(),
## .. interceptions = col_double(),
## .. jumping = col_double(),
## .. long_passing = col_double(),
## .. long_shots = col_double(),
## .. marking = col_double(),
## .. penalties = col_double(),
## .. positioning = col_double(),
## .. reactions = col_double(),
## .. short_passing = col_double(),
## .. shot_power = col_double(),
## .. sliding_tackle = col_double(),
## .. sprint_speed = col_double(),
## .. stamina = col_double(),
## .. standing_tackle = col_double(),
## .. strength = col_double(),
## .. vision = col_double(),
## .. volleys = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
top_nat <- fifa %>% group_by(nationality) %>%
summarise(num_players = n()) %>% arrange(desc(num_players)) %>% slice(1:10)
top_nat
top_nat_stats <- fifa %>% filter(nationality %in% top_nat$nationality) %>%
group_by(nationality) %>% summarise(avg_overall = mean(overall), avg_potential =
mean(potential), avg_age = mean(age)) %>%
arrange(desc(avg_overall))
top_nat_stats
England is the most represented nationality with Germany, Spain, France, and Argentina among the top five. Brazil is the nationality with the highest overall average. Note, that Brazil also has the highest average age of these top 10 nationalities. England has the lowest average age, which could be related to them having the most number of players.
fifa_plot <- fifa %>%
ggplot(aes(x = age, y = potential, color = overall, text = paste("Name:", name, "<br>Club:", club))) +
geom_point(size = 2, alpha = 0.7) + scale_color_gradientn(colors = c("#ffe6f0", "#ffb3d9", "#ff80bf", "#ff4da6")) +
labs(title = "Age against Player Potential (Colored by Overall Rating)",
x = "Age", y = "Potential", color = "Overall Rating") + theme_minimal()
interactive_plot <- ggplotly(fifa_plot, tooltip = "text")
interactive_plot
htmlwidgets::saveWidget(interactive_plot, "fancy_fifa_plot.html")
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
world_shapes <- read_sf(here("data", "ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"))
world_shapes <- filter(world_shapes, ISO_A3 != "ATA")
world_shapes
sort(unique(world_shapes$NAME))
## [1] "Afghanistan" "Albania"
## [3] "Algeria" "Angola"
## [5] "Argentina" "Armenia"
## [7] "Australia" "Austria"
## [9] "Azerbaijan" "Bahamas"
## [11] "Bangladesh" "Belarus"
## [13] "Belgium" "Belize"
## [15] "Benin" "Bhutan"
## [17] "Bolivia" "Bosnia and Herz."
## [19] "Botswana" "Brazil"
## [21] "Brunei" "Bulgaria"
## [23] "Burkina Faso" "Burundi"
## [25] "Cambodia" "Cameroon"
## [27] "Canada" "Central African Rep."
## [29] "Chad" "Chile"
## [31] "China" "Colombia"
## [33] "Congo" "Costa Rica"
## [35] "Côte d'Ivoire" "Croatia"
## [37] "Cuba" "Cyprus"
## [39] "Czechia" "Dem. Rep. Congo"
## [41] "Denmark" "Djibouti"
## [43] "Dominican Rep." "Ecuador"
## [45] "Egypt" "El Salvador"
## [47] "Eq. Guinea" "Eritrea"
## [49] "Estonia" "eSwatini"
## [51] "Ethiopia" "Falkland Is."
## [53] "Fiji" "Finland"
## [55] "Fr. S. Antarctic Lands" "France"
## [57] "Gabon" "Gambia"
## [59] "Georgia" "Germany"
## [61] "Ghana" "Greece"
## [63] "Greenland" "Guatemala"
## [65] "Guinea" "Guinea-Bissau"
## [67] "Guyana" "Haiti"
## [69] "Honduras" "Hungary"
## [71] "Iceland" "India"
## [73] "Indonesia" "Iran"
## [75] "Iraq" "Ireland"
## [77] "Israel" "Italy"
## [79] "Jamaica" "Japan"
## [81] "Jordan" "Kazakhstan"
## [83] "Kenya" "Kosovo"
## [85] "Kuwait" "Kyrgyzstan"
## [87] "Laos" "Latvia"
## [89] "Lebanon" "Lesotho"
## [91] "Liberia" "Libya"
## [93] "Lithuania" "Luxembourg"
## [95] "Macedonia" "Madagascar"
## [97] "Malawi" "Malaysia"
## [99] "Mali" "Mauritania"
## [101] "Mexico" "Moldova"
## [103] "Mongolia" "Montenegro"
## [105] "Morocco" "Mozambique"
## [107] "Myanmar" "N. Cyprus"
## [109] "Namibia" "Nepal"
## [111] "Netherlands" "New Caledonia"
## [113] "New Zealand" "Nicaragua"
## [115] "Niger" "Nigeria"
## [117] "North Korea" "Norway"
## [119] "Oman" "Pakistan"
## [121] "Palestine" "Panama"
## [123] "Papua New Guinea" "Paraguay"
## [125] "Peru" "Philippines"
## [127] "Poland" "Portugal"
## [129] "Puerto Rico" "Qatar"
## [131] "Romania" "Russia"
## [133] "Rwanda" "S. Sudan"
## [135] "Saudi Arabia" "Senegal"
## [137] "Serbia" "Sierra Leone"
## [139] "Slovakia" "Slovenia"
## [141] "Solomon Is." "Somalia"
## [143] "Somaliland" "South Africa"
## [145] "South Korea" "Spain"
## [147] "Sri Lanka" "Sudan"
## [149] "Suriname" "Sweden"
## [151] "Switzerland" "Syria"
## [153] "Taiwan" "Tajikistan"
## [155] "Tanzania" "Thailand"
## [157] "Timor-Leste" "Togo"
## [159] "Trinidad and Tobago" "Tunisia"
## [161] "Turkey" "Turkmenistan"
## [163] "Uganda" "Ukraine"
## [165] "United Arab Emirates" "United Kingdom"
## [167] "United States of America" "Uruguay"
## [169] "Uzbekistan" "Vanuatu"
## [171] "Venezuela" "Vietnam"
## [173] "W. Sahara" "Yemen"
## [175] "Zambia" "Zimbabwe"
sort(unique(fifa$nationality))
## [1] "Afghanistan" "Albania" "Algeria"
## [4] "Angola" "Antigua & Barbuda" "Argentina"
## [7] "Armenia" "Australia" "Austria"
## [10] "Azerbaijan" "Barbados" "Belarus"
## [13] "Belgium" "Benin" "Bermuda"
## [16] "Bolivia" "Bosnia Herzegovina" "Brazil"
## [19] "Brunei Darussalam" "Bulgaria" "Burkina Faso"
## [22] "Burundi" "Cameroon" "Canada"
## [25] "Cape Verde" "Central African Rep." "Chad"
## [28] "Chile" "China PR" "Colombia"
## [31] "Comoros" "Congo" "Costa Rica"
## [34] "Croatia" "Cuba" "Curacao"
## [37] "Cyprus" "Czech Republic" "Denmark"
## [40] "Dominican Republic" "DR Congo" "Ecuador"
## [43] "Egypt" "El Salvador" "England"
## [46] "Equatorial Guinea" "Eritrea" "Estonia"
## [49] "Ethiopia" "Faroe Islands" "Fiji"
## [52] "Finland" "France" "FYR Macedonia"
## [55] "Gabon" "Gambia" "Georgia"
## [58] "Germany" "Ghana" "Gibraltar"
## [61] "Greece" "Guam" "Guatemala"
## [64] "Guinea" "Guinea Bissau" "Guyana"
## [67] "Haiti" "Honduras" "Hong Kong"
## [70] "Hungary" "Iceland" "Iran"
## [73] "Iraq" "Israel" "Italy"
## [76] "Ivory Coast" "Jamaica" "Japan"
## [79] "Kazakhstan" "Kenya" "Korea DPR"
## [82] "Korea Republic" "Kosovo" "Kuwait"
## [85] "Kyrgyzstan" "Latvia" "Lebanon"
## [88] "Liberia" "Libya" "Liechtenstein"
## [91] "Lithuania" "Luxembourg" "Madagascar"
## [94] "Mali" "Malta" "Mauritania"
## [97] "Mauritius" "Mexico" "Moldova"
## [100] "Montenegro" "Montserrat" "Morocco"
## [103] "Mozambique" "Namibia" "Netherlands"
## [106] "New Caledonia" "New Zealand" "Niger"
## [109] "Nigeria" "Northern Ireland" "Norway"
## [112] "Oman" "Palestine" "Panama"
## [115] "Paraguay" "Peru" "Philippines"
## [118] "Poland" "Portugal" "Puerto Rico"
## [121] "Qatar" "Republic of Ireland" "Romania"
## [124] "Russia" "San Marino" "São Tomé & Príncipe"
## [127] "Saudi Arabia" "Scotland" "Senegal"
## [130] "Serbia" "Sierra Leone" "Slovakia"
## [133] "Slovenia" "Somalia" "South Africa"
## [136] "Spain" "Sri Lanka" "St Kitts Nevis"
## [139] "St Lucia" "Sudan" "Suriname"
## [142] "Swaziland" "Sweden" "Switzerland"
## [145] "Syria" "Tanzania" "Thailand"
## [148] "Togo" "Trinidad & Tobago" "Tunisia"
## [151] "Turkey" "Turkmenistan" "Uganda"
## [154] "Ukraine" "United States" "Uruguay"
## [157] "Uzbekistan" "Venezuela" "Vietnam"
## [160] "Wales" "Zambia" "Zimbabwe"
fifa <- fifa %>% mutate(nationality = recode(nationality,
"Bosnia Herzegovina" = "Bosnia and Herz.",
"Brunei Darussalam" = "Brunei",
"China PR" = "China",
"DR Congo" = "Dem. Rep. Congo",
"Czech Republic" = "Czechia",
"Equatorial Guinea" = "Eq. Guinea",
"FYR Macedonia" = "Macedonia",
"Guinea Bissau" = "Guinea-Bissau",
"Ivory Coast" = "Côte d'Ivoire",
"Korea DPR" = "North Korea",
"Korea Republic" = "South Korea",
"Republic of Ireland" = "Ireland",
"Swaziland" = "eSwatini",
"Trinidad & Tobago" = "Trinidad and Tobago",
"United States" = "United States of America",
"England" = "United Kingdom",
"Scotland" = "United Kingdom",
"Wales" = "United Kingdom",
"Northern Ireland" = "United Kingdom"
))
country_counts <- fifa %>% group_by(nationality) %>% summarise(count = n())
world_fifa1 <- world_shapes %>% left_join(country_counts, by = c("NAME" = "nationality"))
fifa_counts_map <- ggplot() + geom_sf(data = world_fifa1, aes(fill = count), color = "gray", size = 0.15) +
scale_fill_gradientn(colors = c("#ffe6f0", "#ffb3d9", "#ff80bf", "#ff4da6"), na.value = "gray90") +
labs(title = "FIFA18 Player Counts by Country", fill = "Number of Players",
caption = "Source: FIFA18 player data, Shapefile: Natural Earth") +
theme_void() + theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5, face = "bold"))
fifa_counts_map
ggsave("fifa_country_counts.png")
## Saving 7 x 5 in image
country_overall <- fifa %>% group_by(nationality) %>% summarise(avg_overall = mean(overall))
world_fifa2 <- world_shapes %>% left_join(country_overall, by = c("NAME" = "nationality"))
fifa_overall_map <- ggplot() + geom_sf(data = world_fifa2, aes(fill = avg_overall), color = "gray", size = 0.15) +
scale_fill_gradientn(colors = c("#ffe6f0", "#ffb3d9", "#ff80bf", "#ff4da6"), na.value = "gray90") +
labs(title = "FIFA18 Average Overall by Country", fill = "Average Overall",
caption = "Source: FIFA18 player data, Shapefile: Natural Earth") +
theme_void() + theme(legend.position = "bottom", plot.title = element_text(hjust = 0.5, face = "bold"))
fifa_overall_map
ggsave("fifa_country_rating.png")
## Saving 7 x 5 in image
model <- lm(overall ~ age + potential + strength + sprint_speed, data = fifa)
model_df <- tidy(model) %>% filter(term != "(Intercept)")
ggplot(model_df, aes(x = reorder(term, estimate), y = estimate)) +
geom_col(fill = "#ffb3d9") +
geom_text(aes(label = round(estimate, 2)), vjust = -0.5, size = 3.5) +
theme_minimal() +
labs(title = "Linear Model Coefficients for Player Overall Rating",
x = "Model Term", y = "Estimate") +
coord_flip()
ggsave("fifa_lm_coef_rating.png")
## Saving 7 x 5 in image
An increase in any of these variables is associated with an increase in overall rating. This shows that age and potential are a strong indicator of overall rating. Older players have higher overall ratings. A one point increase in potential will increase the overall rating by 0.89 points. Sprint speed and strength have much smaller effects on the overall rating. This shows their expected potential and the experience of a player is more important than their physical abilities like strength and speed.